subway_ridership = lm(subway_2020 ~ month, data = mta_data)
subway_ridership %>%
broom::tidy() %>%
select(term, estimate, p.value) %>%
knitr::kable(digits = 3)| term | estimate | p.value |
|---|---|---|
| (Intercept) | 965079.87 | 0.000 |
| month | 32472.74 | 0.139 |
#to plot for regression line
# ggplot(mta_data, aes(month, subway_2020)) +
# geom_point() +
# stat_smooth(method = lm)
mta_data %>%
modelr::add_predictions(subway_ridership) %>%
modelr::add_residuals(subway_ridership) %>%
ggplot(aes(x = pred, y = resid)) + geom_point() +
labs(x = "Predicted value",
y = "Residual")# A tibble: 2 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 717060. 64077. 11.2 4.46e-24
2 month 26325. 8733. 3.01 2.82e- 3
#Now we need to tidy the output and get only the intercept, slope and p-values
bus_ridership %>%
broom::tidy() %>%
select(term, estimate, p.value) %>%
knitr::kable(digits = 3)| term | estimate | p.value |
|---|---|---|
| (Intercept) | 717060.27 | 0.000 |
| month | 26324.69 | 0.003 |
#to plot for regression line
#ggplot(mta_data, aes(month, bus_2020)) +
# geom_point() +
# stat_smooth(method = lm)plot_ttest %>%
plot_ly(
x = ~month, y = ~value, type = "scatter", mode = "lines+markers",
color = ~variable, text = ~text_label) %>%
layout (
title = "Monthly Average Ridership of Subway 2019 vs 2020",
xaxis = list(title ="Months",range=c(3,11)),
yaxis = list(title="Average Ridership"),
legend = list(font = list(size = 10))
)plot_subway %>%
plot_ly(
x = ~date, y = ~value, type = "scatter", mode = "markers",
color = ~variable, text = ~text_label) %>%
layout (
title = "Subway Ridership Trends 2019 - 2020",
xaxis = list(title ="Month/Day", tickformat = "%m/%d"), #drop year
yaxis = list(title="Ridership")) %>%
add_lines(x =as.Date("2020-03-01"), line = list(dash="dot", color = 'red', width=0.5, opacity = 0.5),name = 'First case on 3/1') %>%
add_lines(x =as.Date("2020-04-07"), line = list(dash="dot", color = 'red', width=0.5, alpha = 0.5),name = '100K cases in NYC on 04/07') %>%
add_lines(x =as.Date("2020-05-26"), line = list(dash="dot", color = 'red', width=0.5, alpha = 0.5),name = '200K cases in NYC on 05/26')#plot_bus
plot_bus %>%
plot_ly(
x = ~date, y = ~value, type = "scatter", mode = "markers",
color = ~variable, text = ~text_label) %>%
layout (
title = "Bus Ridership Trends 2019 - 2020",
xaxis = list(title ="Month/Day", tickformat = "%m/%d"),
yaxis = list(title="Ridership")) %>%
add_lines(x =as.Date("2020-03-01"), line = list(dash="dot", color = 'red', width=0.5, opacity = 0.5),name = 'First case on 3/1') %>%
add_lines(x =as.Date("2020-04-07"), line = list(dash="dot", color = 'red', width=0.5, alpha = 0.5),name = '100K cases in NYC on 04/07') %>%
add_lines(x =as.Date("2020-05-26"), line = list(dash="dot", color = 'red', width=0.5, alpha = 0.5),name = '200K cases in NYC on 05/26')